#!/usr/bin/python
import re

__author__ = 'meirdrago'

#	Extract by regex into dict defined by fields list
#	
#	Example : regex = '\[error\].+:\s+([^:]+?) in /[^\s]+/([^/]+\.php).+line\s+(\d+)' , fields = [errdesc,class,line]
#	INPUT :
#	2013/12/17 01:36:14 [error] 1534#0: *1007248619 FastCGI sent in stderr: "PHP message: PHP Warning:  json_encode(): Invalid UTF-8 sequence in argument in /facemoods/web/start.facemoods.com/application/classes/ICLogger.php on line 39
#
#	OUTPUT :
#	{errdesc:'Invalid UTF-8 sequence in argument' , class:'/facemoods/web/start.facemoods.com/application/classes/ICLogger.php' , line:'39'}

class Parser:
	regex 	= None
	fields	= None
	def __init__(self,regex,fields):
		self.regex 	= re.compile(regex.replace('\\\\','\\'))
		self.fields 	= fields 

	def do_parse(self,line):
		res   = {}
		parts = self.regex.findall(line)
		if len(parts) > 0 and len(parts[0]) == len(self.fields):
			for i, field in enumerate(self.fields):
				res[field] = parts[0][i]
		return res


